### MAKE SURE YOU CHANGE THE DIRECTORY TO THE ONE CONTAINING THIS FILE

library(RWeka)
library(reshape2)
library(ggplot2)
library(plyr)
library(quanteda)
library(scales)
library(grid)   # install from https://cran.r-project.org/src/contrib/Archive/grid/
library(texreg)
library(Hmisc)
library(pvsR)
library(rio)
library(sjPlot)
library(foreign)
library(car)
library(MASS)

##SET THE WORKING DIRECTORY HERE
setwd("~/Downloads/PSRMReplications/")

##Load the personality scores
##NOTE: It takes weeks to generate the scores on a supercomputer. 
##If you want to generate this manually, you will need a separate archive
load("BootstrapHouseJackknife.Rda")


# code to adjust plotting axes (to get bottom axes on fifth panel)

facetAdjust <- function(x, pos = c("up", "down"))
{
  pos <- match.arg(pos)
  p <- ggplot_build(x)
  gtable <- ggplot_gtable(p); dev.off()
  dims <- apply(p$panel$layout[2:3], 2, max)
  nrow <- dims[1]
  ncol <- dims[2]
  panels <- sum(grepl("panel", names(gtable$grobs)))
  space <- ncol * nrow
  n <- space - panels
  if(panels != space){
    idx <- (space - ncol - n + 1):(space - ncol)
    gtable$grobs[paste0("axis_b",idx)] <- list(gtable$grobs[[paste0("axis_b",panels)]])
    if(pos == "down"){
      rows <- grep(paste0("axis_b\\-[", idx[1], "-", idx[n], "]"), 
                   gtable$layout$name)
      lastAxis <- grep(paste0("axis_b\\-", panels), gtable$layout$name)
      gtable$layout[rows, c("t","b")] <- gtable$layout[lastAxis, c("t")]
    }
  }
  class(gtable) <- c("facetAdjust", "gtable", "ggplot"); gtable
}

print.facetAdjust <- function(x, newpage = is.null(vp), vp = NULL) {
  if(newpage)
    grid.newpage()
  if(is.null(vp)){
    grid.draw(x)
  } else {
    if (is.character(vp)) 
      seekViewport(vp)
    else pushViewport(vp)
    grid.draw(x)
    upViewport()
  }
  invisible(x)
}


# STABILITY OVER TIME PLOT
ds <- melt(meanPars,
           id.var=c("icpsr","Year",colnames(meanPars)[grep("(^l.*|^u.*)",colnames(meanPars))]))

ds$CI_l <- 0
ds$CI_u <- 0
for (i in 1:nrow(ds)){
  ds$CI_l[i] <- ds[i,paste("l",ds$variable[i],sep="")]
  ds$CI_u[i] <- ds[i,paste("u",ds$variable[i],sep="")]
}



ds$member <- NA
ds$member <- replace(ds$member, ds$icpsr==29137, "Boehner")
ds$member <- replace(ds$member, ds$icpsr==15448, "Pelosi")
ds$member <- replace(ds$member, ds$icpsr==14873, "Hoyer")
ds$member <- replace(ds$member, ds$icpsr==20144, "Cantor")
ds$member <- replace(ds$member, ds$icpsr==14290, "Paul")



### FIGURE 2
gTH <- ggplot(subset(ds, ds$icpsr %in% c(29137,15448,14873,14290,20144)), 
              aes(Year, value, fill=member)) +
          geom_line(size=1) +
          geom_point(size=3, aes(shape=member)) +
          geom_ribbon(aes(ymin=CI_l, ymax=CI_u, fill=member), alpha=0.3) +
          facet_wrap(~variable, nrow=2) +
          scale_x_continuous(breaks=seq(1996, 2014,by=2)) +
          scale_shape_discrete(name = "House Members\n") +
          scale_fill_grey(name = "House Members\n", start = 0.1, end = 0.5) +
          theme_bw() +
          theme(axis.text.x=element_text(angle=45,hjust=1)) +
          theme(panel.margin = unit(0.75, "lines")) + 
          ylab("Jackknifed Scores (with 95% CI)\n") + 
          xlab("\nYear")

gTH.adj <- facetAdjust(gTH, pos = "up")
print(gTH.adj)  
ggsave(gTH.adj, file="HouseScoresTH.pdf", height=8, width=10)




### WORDCOUNT PLOT
### read texts and do Word Count, match to the data frame

## set up empty holders
res_house <- NULL

#House
for (i in c(1996:2014)){
  tmp <- readLines(paste("House-",i,".txt",sep=""))
  id <- tmp[grep("Features for", tmp)]
  icpsr <- gsub("Features for |.txt...", "", id)
  feature_holder <- NULL
  for (j in 2:85){
    feature_tmp <- tmp[grep("Features for", tmp)+j]
    holder1 <- unlist(strsplit(feature_tmp, " "))[seq(1,2*length(id),by=2)]
    holder2 <- as.numeric(unlist(strsplit(feature_tmp, " "))[seq(2,2*length(id),by=2)])
    feature_holder <- cbind(feature_holder,holder2)
    colnames(feature_holder)[j-1] <- holder1[1]
  }
  feature_holder <- data.frame(icpsr, year=i, feature_holder)
  res_house <- rbind(res_house, feature_holder)
  print(i)
}


#only keep the word count
res <- data.frame(icpsr=res_house$icpsr, year=res_house$year, WC=res_house$WC)

W <- ddply(res, .(icpsr, year), summarise, WC=sum(WC))

ds$matching <- paste(ds$icpsr,ds$Year)
W$matching <- paste(W$icpsr,W$year)

ds <- merge(ds, W, by.x="matching", by.y="matching")

##plot WC by parameter
ds <- subset(ds, !is.na(CI_l))

g <- ggplot(ds, aes(WC,CI_u-CI_l)) +
  facet_wrap(~variable) +
  geom_point() +
  stat_smooth() +
  geom_hline(aes(yintercept=1, colour="red")) +
  geom_vline(aes(xintercept=10000, colour="red")) +
  xlab("Word Count") +
  ylab("Confidence Interval Width") +
  theme_bw()

asinh_trans <- function(){
  trans_new(name = 'asinh', transform = function(x) asinh(x), 
            inverse = function(x) sinh(x))
}

ds$variable <- factor(ds$variable, labels = c("Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Emotional Stability"))
ds$CIW <- NA
ds[ds$variable == "Openness",]$CIW <- mean(ds[ds$variable == "Openness",]$CI_u - ds[ds$variable == "Openness",]$CI_l)
ds[ds$variable == "Conscientiousness",]$CIW <- mean(ds[ds$variable == "Conscientiousness",]$CI_u - ds[ds$variable == "Conscientiousness",]$CI_l)
ds[ds$variable == "Extraversion",]$CIW <- mean(ds[ds$variable == "Extraversion",]$CI_u - ds[ds$variable == "Extraversion",]$CI_l)
ds[ds$variable == "Agreeableness",]$CIW <- mean(ds[ds$variable == "Agreeableness",]$CI_u - ds[ds$variable == "Agreeableness",]$CI_l)
ds[ds$variable == "Emotional Stability",]$CIW <- mean(ds[ds$variable == "Emotional Stability",]$CI_u - ds[ds$variable == "Emotional Stability",]$CI_l)




### FIGURE 3
g2 <- ggplot(ds, aes(WC,CI_u-CI_l)) +
  facet_wrap(~variable, ncol = 3) +
  geom_point(alpha = 0.3, color = "grey") +
  stat_smooth(col = "black", fill = "grey20") +
  geom_hline(aes(yintercept=CIW), linetype="dashed") +
  geom_vline(aes(xintercept=round(median(ds[ds$variable == "Openness",]$WC))), linetype="dashed") +
  xlab("\nWord Count\n(Hyperbolic Arcsin Transform)") +
  ylab("Confidence Interval Width\n(Hyperbolic Arcsin Transform)\n") +
  scale_y_continuous(trans = 'asinh', breaks = c(0, 1, 2, 3, 4, 5, 6, 7), limits = c(0, 7)) + 
  scale_x_continuous(trans = 'asinh', breaks = c(10, 100, 1000, 10000, 100000, 1000000), limits = c(10, 1000000)) +  
  theme_bw() + 
  theme(axis.text.x=element_text(angle=45,hjust=1)) +
  theme(panel.margin = unit(1.25, "lines"))

g2.adj <- facetAdjust(g2, pos = "up")
print(g2.adj)  
ggsave(g2.adj, file="WCTransformed.pdf", height=6, width=8.5)






#### FEATURE TABLES
load("BootstrapHouseJackknife.Rda")


# extract selected traits to make plots over time
numincong <- NULL

social.cors <- NULL # EXTRAVERSION
social.ses <- NULL # EXTRAVERSION

future.cors <- NULL # CONSCIENTIOUSNESS
future.ses <- NULL # CONSCIENTIOUSNESS

anger.cors <- NULL # AGREEABLENESS
anger.ses <- NULL # AGREEABLENESS

totalwords.cors <- NULL # EXTRAVERSION
totalwords.ses <- NULL # EXTRAVERSION

sixltrs.cors <- NULL # OPENNESS
sixltrs.ses <- NULL # OPENNESS

anxiety.cors <- NULL # EMOTIONAL STABILITY
anxiety.ses <- NULL # EMOTIONAL STABILITY


for (i in c(1996:2014)){
  meanParsfoo <- meanPars[meanPars$Year == i,]
  meanParsfoo$icpsr <- as.numeric(as.character(meanParsfoo$icpsr))
  meanParsfoo <- meanParsfoo[,c(1,13:17)]
  
  
  foo.liwc <- read.arff(paste("House-",i,".arff", sep = ""))  # testing format
  foo.liwc$filename <- gsub(paste("/Users/Adam/Desktop/Speeches/House/",i,"/", sep = ""),"", foo.liwc$filename)
  foo.liwc$filename <- gsub(".txt","", foo.liwc$filename)
  foo.liwc$filename <- as.numeric(foo.liwc$filename)
  colnames(foo.liwc)[1] <- "icpsr"
  foo.liwc <- foo.liwc[,-c(86:90)]
  
  merged.data.foo <- merge(meanParsfoo, foo.liwc, by = "icpsr")
  
  cors.foo <- rcorr(as.matrix(merged.data.foo[,-c(1,8,12,13,14,20)]))[[1]][,1:5]
  p.vals.foo <- rcorr(as.matrix(merged.data.foo[,-c(1,8,12,13,14,20)]))[[3]][,1:5]
  
  assign(paste("cors.",i,sep=""), cors.foo)
  assign(paste("p.vals.",i,sep=""), p.vals.foo)
  
  
  coef.names <- c("Openn",
                  "Consc",
                  "Extra",
                  "Agree",
                  "Emoti",
                  "Age of Acquisition", 
                  "Concreteness", 
                  "Familiarity", 
                  "Imagability", 
                  "Meaningfulness (Colorado)", 
                  "Meaningfulness (Paivo)", 
                  "Number of Letters", 
                  "Number of Phonemes", 
                  "Number of Syllables", 
                  "Word Count", 
                  "Words Per Sentence", 
                  "Sentences Ending With ?", 
                  "Unique Words", 
                  "Dictionary Words", 
                  "Words > 6 Letters", 
                  "Abbreviations", 
                  "Emoticons", 
                  "Total Pronouns", 
                  "First-Person Singular", 
                  "First-Person Plural", 
                  "Total First Person", 
                  "Total Second Person", 
                  "Total Third Person", 
                  "Negations", 
                  "Assents", 
                  "Articles", 
                  "Prepositions", 
                  "Numbers", 
                  "Affective Processes", 
                  "Positive Emotions", 
                  "Positive Feelings", 
                  "Optimism and Energy", 
                  "Negative Emotions", 
                  "Anxiety or Fear", 
                  "Anger", 
                  "Sadness or Depression", 
                  "Cognitive Processes", 
                  "Causation", 
                  "Insight", 
                  "Discrepancy", 
                  "Inhibition", 
                  "Tentative", 
                  "Certainty", 
                  "Sensory and Perceptual Processes", 
                  "Seeing", 
                  "Hearing", 
                  "Feeling", 
                  "Social Processes", 
                  "Communication", 
                  "Other References to People", 
                  "Humans", 
                  "Time", 
                  "Past Tense Verb", 
                  "Present Tense Verb", 
                  "Future Tense Verb", 
                  "Space", 
                  "Up", 
                  "Down", 
                  "Inclusive", 
                  "Exclusive", 
                  "Motion", 
                  "Achievement", 
                  "Religion", 
                  "Sex and Sexuality", 
                  "Swear Words", 
                  "Nonfluencies", 
                  "Fillers", 
                  "Number of Periods", 
                  "Number of Commas", 
                  "Number of Colons", 
                  "Number of Semicolons", 
                  "Number of Question Marks", 
                  "Number of Exclamations", 
                  "Number of Dashes", 
                  "Number of Quotation Marks", 
                  "Number of Apostrophes", 
                  "Number of Pairs of Parentheses", 
                  "Number of Other Punctuation", 
                  "All Punctuation")
  
  tex.foo <- texreg(list(createTexreg(coef.names = rownames(cors.foo), coef = cors.foo[,1], pvalues = p.vals.foo[,1], se = as.numeric(rep(NA, length(cors.foo[,1])))),
                         createTexreg(coef.names = rownames(cors.foo), coef = cors.foo[,2], pvalues = p.vals.foo[,2], se = as.numeric(rep(NA, length(cors.foo[,2])))),
                         createTexreg(coef.names = rownames(cors.foo), coef = cors.foo[,3], pvalues = p.vals.foo[,3], se = as.numeric(rep(NA, length(cors.foo[,3])))),
                         createTexreg(coef.names = rownames(cors.foo), coef = cors.foo[,4], pvalues = p.vals.foo[,4], se = as.numeric(rep(NA, length(cors.foo[,4])))),
                         createTexreg(coef.names = rownames(cors.foo), coef = cors.foo[,5], pvalues = p.vals.foo[,5], se = as.numeric(rep(NA, length(cors.foo[,5]))))), 
                    omit.coef = "(Openn)|(Agree)|(Emoti)|(Consc)|(Extra)",
                    stars = c(0.01, 0.05, 0.1), digits = 3,
                    custom.model.names = c("Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Emotional Stability"),
                    custom.coef.names = coef.names,
                    caption = paste("Correlations Between LIWC/MRC Features and Jackknifed Big Five Traits (",i,")", sep = ""),
                    label = paste("features",i,sep=""),
                    caption.above = TRUE)
  
  assign(paste("tex.",i,sep=""), tex.foo)
  
  
  numincong <- c(numincong, dim(merged.data.foo)[1])
  social.cors <- c(social.cors, get(paste("cors.",i, sep = ""))[53,3])
  social.ses <- c(social.ses, sqrt((1-(get(paste("cors.",i, sep = ""))[53,3])^2)/(dim(merged.data.foo)[1] - 2)))
  
  future.cors <- c(future.cors, get(paste("cors.",i, sep = ""))[60,2])
  future.ses <- c(future.ses, sqrt((1-(get(paste("cors.",i, sep = ""))[60,2])^2)/(dim(merged.data.foo)[1] - 2)))
  
  anger.cors <- c(anger.cors, get(paste("cors.",i, sep = ""))[40,4])
  anger.ses <- c(anger.ses, sqrt((1-(get(paste("cors.",i, sep = ""))[40,4])^2)/(dim(merged.data.foo)[1] - 2)))
  
  sixltrs.cors <- c(sixltrs.cors, get(paste("cors.",i, sep = ""))[20,1])
  sixltrs.ses <- c(sixltrs.ses, sqrt((1-(get(paste("cors.",i, sep = ""))[20,1])^2)/(dim(merged.data.foo)[1] - 2)))
  
  totalwords.cors <- c(totalwords.cors, get(paste("cors.",i, sep = ""))[15,3])
  totalwords.ses <- c(totalwords.ses, sqrt((1-(get(paste("cors.",i, sep = ""))[15,3])^2)/(dim(merged.data.foo)[1] - 2)))
  
  anxiety.cors <- c(anxiety.cors, get(paste("cors.",i, sep = ""))[39,5])
  anxiety.ses <- c(anxiety.ses, sqrt((1-(get(paste("cors.",i, sep = ""))[39,5])^2)/(dim(merged.data.foo)[1] - 2)))
  
}


validity.frame <- data.frame(low = c(sixltrs.cors - qnorm(0.975)*sixltrs.ses,
                                     future.cors - qnorm(0.975)*future.ses,
                                     totalwords.cors - qnorm(0.975)*totalwords.ses,
                                     anger.cors - qnorm(0.975)*anger.ses,
                                     anxiety.cors - qnorm(0.975)*anxiety.ses),
                             mid = c(sixltrs.cors,
                                     future.cors,
                                     totalwords.cors,
                                     anger.cors,
                                     anxiety.cors),
                             high = c(sixltrs.cors + qnorm(0.975)*sixltrs.ses,
                                     future.cors + qnorm(0.975)*future.ses,
                                     totalwords.cors + qnorm(0.975)*totalwords.ses,
                                     anger.cors + qnorm(0.975)*anger.ses,
                                     anxiety.cors + qnorm(0.975)*anxiety.ses),
                             trait = c(sort(rep(1:5, 19))),
                             year = rep(1996:2014, 5))

validity.frame$trait <- factor(validity.frame$trait, labels = c("Openness and Six-Letter Words",
                                                                "Conscientiousness and Future Tense Words",
                                                                "Extraversion and Total Words",
                                                                "Agreeableness and Anger Words",
                                                                "Emotional Stability and Anxious Words"))



validity.plot <- ggplot(validity.frame, aes(x = year, y= mid, shape = trait)) + 
                    geom_line(size = 1) +
                    geom_point(size = 3, aes(shape = trait)) + 
                    geom_ribbon(aes(ymin = low, ymax = high, fill = trait, linetype = NA), alpha = 0.3) + 
                    theme_bw() + 
                    scale_fill_grey(name = "Personality Trait-Feature Pairing", start = 0.01, end = 0.75) +
                    scale_shape_discrete(name = "Personality Trait-Feature Pairing") +
                    theme(axis.text.x=element_text(angle=45,hjust=1)) + 
                    scale_x_continuous(breaks=seq(1996,2014,by=2), name = "\nYear") + 
                    scale_y_continuous(breaks = seq(-1, 1, by = 0.25), limits = c(-1,1), name = "Yearly Correlation Between Personality Trait and Feature\n") + 
                    scale_color_discrete(name = "Personality Trait-Feature Pairing") + 
                    geom_hline(yintercept = 0, linetype = "dashed")
            
### FIGURE 4                    
ggsave(validity.plot, file="TraitLIWCPlot.pdf", height=6, width=8)


### FEATURE TABLES FOR APPENDIX
tex.1996  # TABLE A-1
tex.1997  # TABLE A-2
tex.1998  # TABLE A-3
tex.1999  # TABLE A-4
tex.2000  # TABLE A-5
tex.2001  # TABLE A-6
tex.2002  # TABLE A-7
tex.2003  # TABLE A-8
tex.2004  # TABLE A-9
tex.2005  # TABLE A-10
tex.2006  # TABLE A-11
tex.2007  # TABLE A-12
tex.2008  # TABLE A-13
tex.2009  # TABLE A-14
tex.2010  # TABLE A-15
tex.2011  # TABLE A-16
tex.2012  # TABLE A-17
tex.2013  # TABLE A-18
tex.2014  # TABLE A-19









### NPAT ANALYSIS
load("HouseNPATwithPers.Rda")

vm <- import("VoldenMinozzi.dta")

covars <- ddply(vm, .(icpsr), summarise,
                dem=mean(dem), maj=mean(maj), dpres=mean(dpres),
                south=mean(south), female=mean(female),
                ideol_ext=mean(ideological_extremism), seniority=mean(seniority),
                votepct=mean(votepct), leader=mean(leader),
                ideology=mean(ideal_partyfree),
                state=state[1])

morecovariates <- merge(covars,res)


mod <- glm(votesmart_respond ~ 
             (Openn + Consc + Extra + Agree + Emoti) +
             dpres + south + female + ideol_ext + 
             seniority + votepct + leader + maj, 
           data=morecovariates,
           family=binomial, x=TRUE)

mod2 <- glm(votesmart_respond ~ 
             (Openn + Consc + Extra + Agree + Emoti), 
           data=morecovariates,
           family=binomial, x=TRUE)

mod3 <- glm(votesmart_respond ~ 
             (Openn + Consc + Extra + Agree + Emoti) +
             dpres + south + female  + 
             seniority + votepct + leader , 
           data=morecovariates,
           family=binomial, x=TRUE)

mod4 <- glm(votesmart_respond ~ 
             (Openn + Consc + Extra + Agree + Emoti) +
             dpres + south + female + ideol_ext + 
             seniority + votepct + leader , 
           data=morecovariates,
           family=binomial, x=TRUE)


### TABLE 2
texreg(list(mod2,mod3,mod4,mod), stars = c(0.01,0.05,0.1), digits = 2,
       custom.coef.names = c("Constant", "Openness", "Conscientiousness", "Extravwersion",
                             "Agreeableness", "Emotional Stability", "Democratic Normal Vote",
                             "South", "Female", "Seniority", "Legislator Voteshare$_{t-1}$",
                             "Leader", "Ideological Extremity", "Majority Party"),
       caption = "Predicting Response to the NPAT At Least Once",
       label = "NPATtable",
       caption.above = TRUE)

#now, we will vary Emotional Stability
means <- apply(mod$x[,-1],2,mean)

mA <- mean(morecovariates$Emoti,na.rm=TRUE)
sdA <- sd(morecovariates$Emoti,na.rm=TRUE)
emoti_vary <- seq(mA-2*sdA, mA+2*sdA, length=101)

newdata <- data.frame(t(means[-grep("Emoti",names(means))]), "Emoti"=emoti_vary)

newdata$preds <- predict(mod, newdata=newdata, type="response", se.fit=TRUE)$fit
newdata$se <- predict(mod, newdata=newdata, type="response", se.fit=TRUE)$se

newdata$Emoti <- (newdata$Emoti-mA)/sdA

g <- ggplot(newdata, aes(Emoti,preds)) + 
  geom_line() + 
  geom_ribbon(aes(xmin=Emoti,xmax=Emoti,ymin=preds-1.65*se,ymax=preds+1.65*se,
                  linetype=NA), alpha=0.3) +
  scale_fill_grey(start = 0.0, end = 0.6) +
  xlab("\nEmotional Stability\n(Number of Standard Deviations from Mean)") +
  ylab("Predicted Rate of Response to NPAT\n") +
  theme_bw()


### FIGURE 5
ggsave(g, file="npat_prob.pdf", height = 4.5, width = 6.75)










# DW-NOMINATE AND LIFETIME MEANS
# load the personality lifetime data
lifetimeMeans <- read.dta("lifetimeMeansHouse.dta")

# ensure that the icpsr variable is numeric and not a factor
lifetimeMeans$icpsr <- as.numeric(as.character(lifetimeMeans$icpsr))

# load the common space data
CS <- read.dta("Commonspace - WITH NOTHING ELSE.dta")

# load biographical data for age/year and gender
bios <- read.dta("congressbios.dta")

# trim to relevant variables
bios <- bios[,1:4]

# subset to the House and relevant variables
CS.trim <- CS[CS$senate == 0,c(1, 3, 5, 12)]

# merge
CS.pers <- merge(CS.trim, lifetimeMeans, by = "icpsr")
CS.pers <- merge(CS.pers, bios, by = "icpsr")

# unique combinations
CS.pers <- unique(CS.pers)

# regress Common Space on personality
mod1 <- lm(dwnom1 ~ lifeOpen + lifeConsc + lifeExtra + lifeAgree + lifeEmoti, data = CS.pers)
mod2 <- lm(dwnom1 ~ lifeOpen + lifeConsc + lifeExtra + lifeAgree + lifeEmoti + I(gender == "F"), data = CS.pers)
mod3 <- lm(dwnom1 ~ lifeOpen + lifeConsc + lifeExtra + lifeAgree + lifeEmoti + birthyear, data = CS.pers)
mod4 <- lm(dwnom1 ~ lifeOpen + lifeConsc + lifeExtra + lifeAgree + lifeEmoti + I(gender == "F") + birthyear, data = CS.pers)


modList <- list(mod1, mod2, mod3, mod4)


### TABLE 1
texreg(modList, stars = c(0.01, 0.05, 0.1), digits = 3,
       custom.coef.names = c("Constant", "Openness", "Conscientiousness",
                             "Extraversion", "Agreeableness", "Emotional Stability",
                             "Female", "Birth Year"),
       caption = "OLS Models of Personality and Common Space Scores (1996-2014)",
       caption.above = TRUE,
       label = "OLS")
       
       
       








### PENNEBAKER CORPUS
leg <- import("hou114.csv")[,-1]
ess <- import("essays.csv")[,-1]

##mean across individuals
leg_mean <- apply(leg,2,mean)
ess_mean <- apply(ess,2,mean)

##combine the data and remove Word Count
uber <- cbind(leg_mean,ess_mean)
colnames(uber) <- c("Legislators","Essays")

##add in ranks
ranks <- cbind(rank(leg_mean), rank(ess_mean))
colnames(ranks) <- c("Legislators","Essays")

##big frame
allstuff <- rbind(data.frame(uber, Correlation = "Word Count"),
                  data.frame(ranks, Correlation = "Feature Usage Rank Ordering"))

##plot it making the asin transformation
asinh_trans <- function(){
  trans_new(name = 'asinh', transform = function(x) asinh(x), 
            inverse = function(x) sinh(x))
}


gWo <- ggplot(data.frame(uber[-2,]), aes(Legislators,Essays)) + 
  #geom_text(label=rownames(uber)[-2],size=5) + 
  geom_point(alpha = 0.3) +
  scale_y_continuous(trans = 'asinh') + 
  scale_x_continuous(trans = 'asinh') + 
  theme_bw() + 
  stat_smooth(method="lm", fill = "grey30", colour = "black") +
  geom_text(aes(5,25,label="Correlation = 0.99"),colour="black") +
  xlab("\nHouse Member Mean LIWC Usage in 2014\n(Hyperbolic Arcsin Transform)") +
  ylab("Pennebaker and King Corpus Mean LIWC Usage\n(Hyperbolic Arcsin Transform)")

gWi <- ggplot(data.frame(uber[-2,]), aes(Legislators,Essays)) + 
  geom_text(label=rownames(uber)[-2],size=5, alpha = 0.3) + 
  #geom_point() +
  scale_y_continuous(trans = 'asinh') + 
  scale_x_continuous(trans = 'asinh') + 
  theme_bw() + 
  stat_smooth(method="lm", fill = "grey30", colour = "black") +
  geom_text(aes(5,25,label="Correlation = 0.99"),colour="black") +
  xlab("\nHouse Member Mean LIWC Usage in 2014\n(Hyperbolic Arcsin Transform)") +
  ylab("Pennebaker and King Corpus Mean LIWC Usage\n(Hyperbolic Arcsin Transform)")

# FIGURE 1
ggsave(gWo, file="EssaysVsSpeeches_nolabels.pdf",
       height=6, width=8)

# APPENDIX FIGURE
ggsave(gWi, file="EssaysVsSpeeches_labels.pdf",
       height=6, width=8)

       
       
       
       
       
       

# BILL PROPOSALS
Bills <- read.dta("BillProposalCounts.dta")
Bills <- Bills[,c(1:3)]
colnames(Bills)[1] <- "id"

bioinfo <- read.dta("congressbios.dta")
bioinfo <- bioinfo[,c(1,3)]
colnames(bioinfo)[1] <- "id"

Bills <- merge(Bills, bioinfo, by = "id")
Bills$year <- 1789+(Bills$cong - 1)*2 + 1 # age at midpoint of congress
Bills$age <- Bills$year - Bills$birthyear

# volden-wiseman independent variables
# NOTE THAT IN THE VW DATA, votepct IS THE PERCENTAGE OF VOTE RECEIVED TO ENTER THE CURRENT CONGRESS! 
VW <- read.csv("LEPData93to112Congresses.csv")
VW <- VW[,-c(1,2)]
colnames(VW)[c(1:2)] <- c("id", "cong")

Bills <- merge(Bills, VW, by = c("id", "cong"))

# NOTE THAT IN THE VW DATA, votepct IS THE PERCENTAGE OF VOTE RECEIVED TO ENTER THE CURRENT CONGRESS! 

# loads the meanPars element
new_ests <- load(file = "BootstrapHouseJackknife_byCong.Rda")
colnames(meanPars)[1] <- "id"
meanParsTrim <- meanPars[,c(1,2,13:17)]
proposal_data <- merge(Bills, meanParsTrim, by = c("id", "cong"))

### bipartisan cosponsorship; save as different object due to different timespan
HMdata <- read.dta("HM_Cosponsorships.dta")
HMproposal_data <- merge(proposal_data, HMdata, by = c("cong", "id"))


cerem.mod.1 <- glm(cbind(c_bills, s_bills + ss_bills) ~ Openn + Consc + Extra + Agree + Emoti, data = proposal_data, family = "binomial")
cerem.mod.2 <- glm(cbind(c_bills, s_bills + ss_bills) ~ Openn + Consc + Extra + Agree + Emoti + majority + dwnom1 + I(dwnom1^2), data = proposal_data, family = "binomial")
cerem.mod.3 <- glm(cbind(c_bills, s_bills + ss_bills) ~ Openn + Consc + Extra + Agree + Emoti + majority + dwnom1 + I(dwnom1^2) + power + age + female + votepct + speaker + chair + subchr + seniority + maj_leader + min_leader, data = proposal_data, family = "binomial")
cerem.mod.4 <- glm(cbind(c_bills, s_bills + ss_bills) ~ (Openn + Consc + Extra + Agree + Emoti)*majority + dwnom1 + I(dwnom1^2) + power + age + female + votepct + speaker + chair + subchr + seniority + maj_leader + min_leader, data = proposal_data, family = "binomial")
cerem.mod.5 <- glm(cbind(c_bills, s_bills + ss_bills) ~ (Openn + Consc + Extra + Agree + Emoti)*majority + dwnom1 + I(dwnom1^2) + power + age + female + votepct + speaker + chair + subchr + seniority + maj_leader + min_leader + factor(cong), data = proposal_data, family = "binomial")


# TABLE 3
texreg(list(cerem.mod.1, cerem.mod.2, cerem.mod.3, cerem.mod.4, cerem.mod.5), digits = 2, stars = c(0.01, 0.05, 0.1),
       omit.coef = "factor",
       custom.coef.names = c("Constant", "Openness", "Conscientiousness", "Extraversion",
                             "Agreeableness", "Emotional Stability", "Majority Party",
                             "Ideology", "Ideological Extremism", "Power Committee", "Age",
                             "Female", "Legislator Voteshare$_{t-1}$", "Speaker", 
                             "Committee Chair", "Subcommittee Chair", "Seniority",
                             "Majority Leadership", "Minority Leadership",
                             "Majority Party $\\times$ Openness",
                             "Majority Party $\\times$ Conscientiousness",
                             "Majority Party $\\times$ Extraversion",
                             "Majority Party $\\times$ Agreeableness",
                             "Majority Party $\\times$ Emotional Stability",
                             "factor105", "factor106", "factor107", "factor108",
                             "factor109", "factor110", "factor111", "factor112"),
       caption = "Personality and Ceremonial Bill Proposals",
       label = "ceremonialtable",
       caption.above = TRUE)                             
   
linearHypothesis(cerem.mod.1, c("Openn = 0", "Emoti = 0", "Extra = 0", "Consc = 0", "Agree = 0"))
linearHypothesis(cerem.mod.2, c("Openn = 0", "Emoti = 0", "Extra = 0", "Consc = 0", "Agree = 0"))
linearHypothesis(cerem.mod.3, c("Openn = 0", "Emoti = 0", "Extra = 0", "Consc = 0", "Agree = 0"))
linearHypothesis(cerem.mod.4, c("Openn = 0", "Emoti = 0", "Extra = 0", "Consc = 0", "Agree = 0", "Openn:majority = 0", "Consc:majority = 0", "Extra:majority = 0", "Agree:majority = 0", "Emoti:majority = 0"))
linearHypothesis(cerem.mod.5, c("Openn = 0", "Emoti = 0", "Extra = 0", "Consc = 0", "Agree = 0", "Openn:majority = 0", "Consc:majority = 0", "Extra:majority = 0", "Agree:majority = 0", "Emoti:majority = 0"))
   

consc_range <- seq(mean(proposal_data$Consc, na.rm = TRUE) - 2*sd(proposal_data$Consc, na.rm = TRUE),
                   mean(proposal_data$Consc, na.rm = TRUE) + 2*sd(proposal_data$Consc, na.rm = TRUE),
                   length.out = 101)


consc.cerem.preds.maj <- predict(cerem.mod.4, newdata = data.frame(Openn = mean(proposal_data$Openn, na.rm = TRUE),
                                                                  Extra = mean(proposal_data$Extra, na.rm = TRUE),
                                                                  Consc = consc_range,
                                                                  Agree = mean(proposal_data$Agree, na.rm = TRUE),
                                                                  Emoti = mean(proposal_data$Emoti, na.rm = TRUE),
                                                                  age = mean(proposal_data$age, na.rm = TRUE),
                                                                  votepct = mean(proposal_data$votepct, na.rm = TRUE),
                                                                  seniority = mean(proposal_data$seniority, na.rm = TRUE),
                                                                  dwnom1 = mean(proposal_data$dwnom1, na.rm = TRUE),
                                                                  majority = 1,
                                                                  power = 0,
                                                                  female = 0,
                                                                  speaker = 0,
                                                                  chair = 0,
                                                                  subchr = 0,
                                                                  maj_leader = 0,
                                                                  min_leader = 0), se = TRUE)

consc.cerem.preds.min <- predict(cerem.mod.4, newdata = data.frame(Openn = mean(proposal_data$Openn, na.rm = TRUE),
                                                                  Extra = mean(proposal_data$Extra, na.rm = TRUE),
                                                                  Consc = consc_range,
                                                                  Agree = mean(proposal_data$Agree, na.rm = TRUE),
                                                                  Emoti = mean(proposal_data$Emoti, na.rm = TRUE),
                                                                  age = mean(proposal_data$age, na.rm = TRUE),
                                                                  votepct = mean(proposal_data$votepct, na.rm = TRUE),
                                                                  seniority = mean(proposal_data$seniority, na.rm = TRUE),
                                                                  dwnom1 = mean(proposal_data$dwnom1, na.rm = TRUE),
                                                                  majority = 0,
                                                                  power = 0,
                                                                  female = 0,
                                                                  speaker = 0,
                                                                  chair = 0,
                                                                  subchr = 0,
                                                                  maj_leader = 0,
                                                                  min_leader = 0), se = TRUE)

consc.cerem.plotdata.maj <- data.frame(low = plogis(consc.cerem.preds.maj$fit - qnorm(0.95)*consc.cerem.preds.maj$se),
                                       mid = plogis(consc.cerem.preds.maj$fit),
                                       high = plogis(consc.cerem.preds.maj$fit + qnorm(0.95)*consc.cerem.preds.maj$se), 
                                       consc = seq(-2, 2, length.out = length(consc_range)),
                                       majority = 1)         

consc.cerem.plotdata.min <- data.frame(low = plogis(consc.cerem.preds.min$fit - qnorm(0.95)*consc.cerem.preds.min$se),
                                       mid = plogis(consc.cerem.preds.min$fit),
                                       high = plogis(consc.cerem.preds.min$fit + qnorm(0.95)*consc.cerem.preds.min$se), 
                                       consc = seq(-2, 2, length.out = length(consc_range)),
                                       majority = 0)         

consc.cerem.plotdata <- rbind(consc.cerem.plotdata.min, consc.cerem.plotdata.maj)
consc.cerem.plotdata$majority <- factor(consc.cerem.plotdata$majority, labels = c("Minority", "Majority"))
                      
                             
consc.cerem.plot <- ggplot(consc.cerem.plotdata, aes(x = consc, y = mid)) + 
                    geom_line(aes(linetype = majority), size = 0.75) + 
                    geom_ribbon(aes(ymin = low, ymax = high, linetype = NA, fill = majority), alpha = 0.6) + 
                    theme_bw() + 
                    ylim(0, .15) +
                    xlab("\nConscientiousness\n(Number of Standard Deviations from Mean)") + 
                    scale_x_continuous(breaks = c(-2, -1, 0, 1, 2)) + 
                    scale_colour_discrete(name = "Party Status") +
                    scale_linetype_discrete(name = "Party Status") + 
                    scale_fill_grey(name = "Party Status", start = 0, end = 0.6) + 
                    ylab("Predicted Proportion of Ceremonial Bill Proposals\n")



# FIGURE 6
pdf("conscceremonialplot.pdf", height = 4.5, width = 6.75)
consc.cerem.plot
dev.off()

